Geocoded individual transaction data
GIS shapefiles
Note that all longitudes and latitudes should be in WGS1984 format
#pragma nodebook off
#Use nodebook for better reproducibility https://github.com/uoa-eResearch/nodebook
%reload_ext nodebook.ipython
%nodebook disk phase3
# load libraries
import geopandas as gpd # vector data
import pandas as pd # tabular data, loading CSVs
import numpy as np # numeric data
from util import *
import matplotlib # plotting
import contextily as ctx # Used for contextual basemaps
from matplotlib_scalebar.scalebar import ScaleBar # scalebar for plot
import matplotlib.pyplot as plt # plotting
from tqdm.auto import tqdm # progress bars
tqdm.pandas()
import json
from shapely.geometry import Point, shape, LineString, MultiLineString, GeometryCollection, MultiPoint, Polygon # creating points
import requests # web requests
from pandarallel import pandarallel # parallel operations for speed
pandarallel.initialize(progress_bar=True)
plt.rcParams['figure.figsize'] = (20, 20)
pd.set_option('max_columns', None)
INFO: Pandarallel will run on 16 workers. INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
ls()
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | 2013-mb-dataset-Total-New-Zealand-Household.csv | 37.12 | 2014-06-04 10:56:30.000000 |
| 1 | 2013-mb-dataset-Total-New-Zealand-Individual-Part-1.csv | 31.66 | 2014-04-01 10:13:15.197000 |
| 2 | 2013_census_mean_income_by_AU2013.xlsx | 0.59 | 2021-09-07 06:21:24.000000 |
| 3 | 2018-census-electoral-population-meshblock-2020-data.csv | 5.60 | 2021-08-09 00:39:18.000000 |
| 4 | 2018_census_dwellings_by_SA2.xlsx | 0.43 | 2021-07-12 14:30:28.000000 |
| 5 | AC_Special_Housing_Area.zip | 0.29 | 2021-09-02 10:36:16.760000 |
| 6 | AUP_viewshafts.zip | 0.21 | 2021-09-22 10:08:05.233857 |
| 7 | AucklandArea.gpkg | 0.09 | 2021-09-02 10:36:17.220000 |
| 8 | Geocoordinates_Direct_Transit_stops_AKL.xlsx | 0.01 | 2020-10-08 07:52:29.000000 |
| 9 | Individual_part1_totalNZ-wide_format_updated_16-7-20.csv | 36.58 | 2020-07-14 16:12:24.000000 |
| 10 | MASTER_UP_BaseZone_SHP.zip | 66.92 | 2021-07-19 02:23:51.137347 |
| 11 | Modified_Community_Boards_SHP.zip | 1.30 | 2021-07-19 02:16:07.650000 |
| 12 | Transmission_Lines_exTRANSPOWER.zip | 0.50 | 2021-09-22 09:31:01.376235 |
| 13 | area-unit-2013.gdb.zip | 14.21 | 2021-09-02 10:36:16.070000 |
| 14 | kx-nz-state-highway-on-ramps-off-ramps-SHP.zip | 0.14 | 2021-08-25 09:52:06.341291 |
| 15 | lds-nz-coastline-mean-high-water-FGDB.zip | 4.88 | 2021-08-31 16:25:16.250000 |
| 16 | lds-nz-coastlines-and-islands-polygons-topo-150k-FGDB.zip | 4.25 | 2021-08-05 15:57:49.020000 |
| 17 | lds-nz-primary-parcels-CLIPPED-4326.gpkg | 273.35 | 2021-09-02 10:37:32.410000 |
| 18 | lds-nz-primary-parcels-FGDB.zip | 79.03 | 2021-08-12 09:13:27.910000 |
| 19 | lds-nz-railway-centrelines-topo-150k-SHP.zip | 0.87 | 2021-08-31 10:01:46.315326 |
| 20 | lds-nz-road-centrelines-topo-150k-FGDB.zip | 36.06 | 2021-08-31 09:43:07.931704 |
| 21 | lds-nz-street-address-GPKG-CLIPPED.gpkg | 170.23 | 2021-09-02 10:37:19.490000 |
| 22 | lds-nz-street-address-GPKG.zip | 200.88 | 2021-09-02 10:37:26.070000 |
| 23 | meshblock-2013.gdb.zip | 106.57 | 2021-09-02 10:36:59.860000 |
| 24 | meshblock-2018-clipped-generalised.gdb.zip | 32.83 | 2021-09-02 10:36:26.010000 |
| 25 | nz-primary-parcels.gdb.zip | 55.00 | 2021-09-02 10:36:39.020000 |
| 26 | statsnzarea-unit-2013-FGDB.zip | 13.79 | 2021-08-31 16:56:17.150000 |
| 27 | statsnzmeshblock-higher-geographies-2018-generalised-FGDB.zip | 34.54 | 2021-08-05 14:53:54.350000 |
| 28 | statsnzpopulation-by-meshblock-2013-census-FGDB.zip | 82.11 | 2021-07-19 13:53:55.150631 |
| 29 | statsnzstatistical-area-2-higher-geographies-2018-clipped-generalis-FGDB.zip | 10.72 | 2021-08-30 17:12:55.591895 |
Total: 1301.0MB
ls("restricted")
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | QPIDs_Auckland_1990_2020.csv | 17.40 | 2021-07-07 13:09:39.000 |
| 1 | QPIDs_Auckland_1990_2020_augmented.csv | 238.31 | 2021-09-22 09:50:06.900 |
| 2 | road_intersections.pkl | 44.95 | 2021-09-10 15:59:13.120 |
Total: 301.0MB
df = pd.read_csv("restricted/QPIDs_Auckland_1990_2020.csv")
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.CL_Longitude, df.CL_Latitude), crs="EPSG:4326")
df = df.set_index("CL_QPID_output2")
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | |
|---|---|---|---|---|
| CL_QPID_output2 | ||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) |
| ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) |
383272 rows × 4 columns
print(sum(pd.isna(df.CL_Longitude)))
df = df.dropna()
assert len(df) == len(df.index.unique())
print(len(df))
7178 376094
%%time
parcels = gpd.read_file('input/lds-nz-primary-parcels-FGDB.zip!nz-primary-parcels.gdb')
parcels = parcels.to_crs(df.crs)
parcels = parcels.set_index("id")
parcels["parcel_geometry"] = parcels.geometry
CPU times: user 41.1 s, sys: 6.41 s, total: 47.5 s Wall time: 47.8 s
parcels
| appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 5266447 | None | None | Hydro | Primary | None | North Auckland | None | NaN | 1289690.0 | MULTIPOLYGON (((174.46092 -36.26249, 174.46083... | MULTIPOLYGON (((174.46092 -36.26249, 174.46083... |
| 4789727 | Part Lot 3 Allot 64 Section 1 SBRS OF Auckland | SO 663 | DCDB | Primary | None | North Auckland | None | NaN | 1.0 | MULTIPOLYGON (((174.77836 -36.85187, 174.77838... | MULTIPOLYGON (((174.77836 -36.85187, 174.77838... |
| 4810316 | Part Tidal Lands of Manukau Harbour Survey Off... | SO 67474 | DCDB | Primary | [Create] Revested in the Crown Sec 5 Foreshore... | North Auckland | None | 31600000.0 | 31342451.0 | MULTIPOLYGON (((174.68836 -37.01631, 174.68898... | MULTIPOLYGON (((174.68836 -37.01631, 174.68898... |
| 4817943 | Crown Land Survey Office Plan 58175 | SO 58175 | DCDB | Primary | [Create] Crown Land Reserved from Sale Sec 58 ... | North Auckland | None | NaN | 467490.0 | MULTIPOLYGON (((174.33091 -36.32797, 174.33059... | MULTIPOLYGON (((174.33091 -36.32797, 174.33059... |
| 4827816 | Lot 2 DP 165098 | DP 165098 | DCDB | Primary | None | North Auckland | NA99B/977 | 22979.0 | 22972.0 | MULTIPOLYGON (((174.73406 -36.69245, 174.73408... | MULTIPOLYGON (((174.73406 -36.69245, 174.73408... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7291940 | Lot 745 DP 433546 | DP 433546 | Fee Simple Title | Primary | None | North Auckland | 528968 | 247.0 | 247.0 | MULTIPOLYGON (((174.93787 -37.03903, 174.93763... | MULTIPOLYGON (((174.93787 -37.03903, 174.93763... |
| 7266269 | Lot 533 DP 427884 | DP 427884, LT 454543 | Fee Simple Title | Primary | None | North Auckland | 520875 | 336.0 | 336.0 | MULTIPOLYGON (((174.93990 -37.03814, 174.94015... | MULTIPOLYGON (((174.93990 -37.03814, 174.94015... |
| 8051540 | Lot 8 DP 533517 | DP 533517 | Fee Simple Title | Primary | None | North Auckland | 876934 | 185.0 | 185.0 | MULTIPOLYGON (((174.67269 -36.59236, 174.67269... | MULTIPOLYGON (((174.67269 -36.59236, 174.67269... |
| 7264065 | Lot 518 DP 429024 | DP 429024 | Fee Simple Title | Primary | None | North Auckland | 513803 | 313.0 | 313.0 | MULTIPOLYGON (((174.93753 -37.03880, 174.93747... | MULTIPOLYGON (((174.93753 -37.03880, 174.93747... |
| 7266268 | Lot 532 DP 427884 | DP 427884, LT 454543 | Fee Simple Title | Primary | None | North Auckland | 520874 | 331.0 | 331.0 | MULTIPOLYGON (((174.94009 -37.03794, 174.94015... | MULTIPOLYGON (((174.94009 -37.03794, 174.94015... |
537290 rows × 11 columns
%%time
df = gpd.sjoin(df, parcels, how="left")
CPU times: user 39.6 s, sys: 1.26 s, total: 40.9 s Wall time: 41.2 s
df.parcel_intent.value_counts()
DCDB 276092 Fee Simple Title 99917 Legalisation 79 Road 3 Vesting on Deposit for Local Purpose Reserve 2 Vesting on Deposit for Recreation Reserve (Territorial Authority) 1 Name: parcel_intent, dtype: int64
# Roads shouldn't be in this dataset - might have to give these 3 points a little nudge
sold_roads = df[df.parcel_intent == "Road"]
for i in range(len(sold_roads)):
sold_road = sold_roads.iloc[i:i+1]
display(sold_road)
ax = sold_road.parcel_geometry.to_crs(epsg=3857).plot(alpha=.5)
sold_road.parcel_geometry.centroid.to_crs(epsg=3857).plot(ax=ax, color="red")
sold_road.to_crs(epsg=3857).plot(ax=ax, color="green")
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, zoom=21 if i == 0 else "auto")
ax.set_title("Red = parcel centroid, green = QPID latlong")
plt.show()
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 258637 | 174.84187 | -36.903922 | 2020 | POINT (174.84187 -36.90392) | 7515185 | Section 1 SO 471988 | SO 471988, SO 509515 | Road | Primary | [Create] Land declared Road New Zealand Gazett... | North Auckland | None | 690.0 | 688.0 | MULTIPOLYGON (((174.84229 -36.90402, 174.84226... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.59097 | 2020 | POINT (174.64266 -36.59097) | 6605265 | None | SO 70497 | Road | Primary | [Create] Road New Zealand Gazette 2002 p 4425 | North Auckland | None | NaN | 31611.0 | MULTIPOLYGON (((174.63916 -36.58870, 174.63934... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74900 -36.82376) | 5222196 | None | None | Road | Primary | None | North Auckland | None | NaN | 3462.0 | MULTIPOLYGON (((174.74895 -36.82419, 174.74897... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
df.at[258637, "parcel_intent"] = "Glitch?"
print(f"{1e-4} degrees equates to {df.loc[[1528194]].to_crs(epsg=2193).distance(df.loc[[1528194]].translate(yoff=-1e-4).to_crs(epsg=2193)).iloc[0]} meters")
0.0001 degrees equates to 11.09551261419352 meters
problems = [1528194, 1779952]
display(df.loc[problems])
# Nudge point south a little bit
df.geometry[1528194] = df.loc[[1528194]].translate(yoff=-1e-4)
# Nudge point west a little bit
df.geometry[1779952] = df.loc[[1779952]].translate(xoff=-1e-4)
# Redo the join for these newly adjusted points
subset = df.loc[df.index.isin(problems), ["CL_Longitude", "CL_Latitude", "QPID_vintage", "geometry"]]
df.loc[problems] = gpd.sjoin(subset, parcels)
display(df.loc[problems])
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.590970 | 2020 | POINT (174.64266 -36.59097) | 6605265 | None | SO 70497 | Road | Primary | [Create] Road New Zealand Gazette 2002 p 4425 | North Auckland | None | NaN | 31611.0 | MULTIPOLYGON (((174.63916 -36.58870, 174.63934... |
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74900 -36.82376) | 5222196 | None | None | Road | Primary | None | North Auckland | None | NaN | 3462.0 | MULTIPOLYGON (((174.74895 -36.82419, 174.74897... |
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.590970 | 2020 | POINT (174.64266 -36.59107) | 5201612 | Part Allot 74 PSH OF Waiwera | LT 549520, SO 1693/B | DCDB | Primary | None | North Auckland | NA1129/238 | 2289.0 | 2286.0 | MULTIPOLYGON (((174.64203 -36.59121, 174.64232... |
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74890 -36.82376) | 4839993 | Lot 1 DP 154975 | DP 154975 | DCDB | Primary | None | North Auckland | NA125D/837, NA125D/838 | 811.0 | 760.0 | MULTIPOLYGON (((174.74859 -36.82361, 174.74901... |
df.parcel_intent.value_counts()
DCDB 276094 Fee Simple Title 99917 Legalisation 79 Vesting on Deposit for Local Purpose Reserve 2 Glitch? 1 Vesting on Deposit for Recreation Reserve (Territorial Authority) 1 Name: parcel_intent, dtype: int64
df = df.rename(columns={"titles": "LINZ_parcel_ID"})
df.index_right = df.index_right.astype('Int64')
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) | 5128312 | Part Allot S25 PSH OF Arai | None | DCDB | Primary | None | North Auckland | NA589/73 | NaN | 2913.0 | MULTIPOLYGON (((174.58877 -36.18632, 174.58888... |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) | 5128309 | Lot 2 DP 130303 | DP 130303 | DCDB | Primary | None | North Auckland | NA76B/662 | 9859.0 | 8620.0 | MULTIPOLYGON (((174.58154 -36.20156, 174.58161... |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) | 4823770 | Part Otioro & Te Topuni A2A Block | ML 9928 | DCDB | Primary | None | North Auckland | NA1373/48 | 8296.0 | 7490.0 | MULTIPOLYGON (((174.49514 -36.22889, 174.49701... |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) | 4697657 | Lot 1 DP 44316 | DP 44316 | DCDB | Primary | None | North Auckland | NA1373/47 | 3667.0 | 4152.0 | MULTIPOLYGON (((174.49863 -36.22877, 174.49862... |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) | 5202126 | Lot 1 DP 52926 | DP 52926 | DCDB | Primary | None | North Auckland | NA8B/226 | 3331.0 | 3323.0 | MULTIPOLYGON (((174.49427 -36.23026, 174.49386... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) | 7974171 | Lot 82 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826394 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72812... |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) | 7974172 | Lot 83 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826395 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72814... |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) | 7440747 | Lot 2 DP 460961 | DP 460961 | Fee Simple Title | Primary | None | North Auckland | 605470 | 4500.0 | 4495.0 | MULTIPOLYGON (((174.65635 -36.51025, 174.65624... |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) | 7987560 | Lot 1 DP 533552 | DP 533552 | Fee Simple Title | Primary | None | North Auckland | 878889 | 62163.0 | 62143.0 | MULTIPOLYGON (((174.53978 -36.77485, 174.53974... |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) | 7799795 | Section 36 SO 498829 | SO 498829 | Fee Simple Title | Primary | [Create] Fee Simple Title New Zealand Gazette ... | North Auckland | 900731 | 1460.0 | 1459.0 | MULTIPOLYGON (((174.64432 -36.85494, 174.64428... |
376094 rows × 15 columns
df.LINZ_parcel_ID.sample(10)
CL_QPID_output2 80702 NA11B/1025 2757456 520881 1577173 NA34D/1279, NA34D/1280, NA34D/1281 1628626 NA1836/86 1600205 NA78C/992 205149 NA76D/24, NA79A/586 3072170 766259 149557 NA108C/332 193532 NA95C/798 2303395 126748, 126749 Name: LINZ_parcel_ID, dtype: object
b. Centroid longitude of parcel(s). LINZ_parcel_centroid_lon
c. Centroid latitude of parcel(s). LINZ_parcel_centroid_lat
df["LINZ_parcel_centroid_lon"] = df.parcel_geometry.centroid.x
df["LINZ_parcel_centroid_lat"] = df.parcel_geometry.centroid.y
<string>:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. <string>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
sample_parcels = parcels.cx[174.782:174.783, -36.870:-36.871]
ax = sample_parcels.to_crs(epsg=3857).plot(column="parcel_intent", legend=True, alpha=.5, categorical=True, edgecolor="black")
sample_parcels.centroid.to_crs(epsg=3857).plot(ax=ax, color="red")
df[df.index_right.isin(sample_parcels.index)].to_crs(epsg=3857).plot(ax=ax, color="green")
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
plt.title("Red = parcel centroid, green = QPID latlong")
<string>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
Text(0.5, 1.0, 'Red = parcel centroid, green = QPID latlong')
d. vector of longitudes of the vertices of the matched LINZ parcels LINZ_parcel_vertices_lon
e. vector of latitudes of the vertices of the matched LINZ parcels LINZ_parcel_vertices_lat
pd.Series(type(a) for a in df.parcel_geometry).value_counts()
<class 'shapely.geometry.multipolygon.MultiPolygon'> 376094 dtype: int64
%%time
def extract_verts(geometry):
lat = np.nan
lng = np.nan
if geometry:
coordinates = []
for polygon in geometry:
coordinates.extend(polygon.exterior.coords[:-1]) # Drop last point, as it's just the first point again
lng = f"[{'; '.join([str(round(point[0], 6)) for point in coordinates])}]"
lat = f"[{'; '.join([str(round(point[1], 6)) for point in coordinates])}]"
return pd.Series([lng, lat])
df[["LINZ_parcel_vertices_lon", "LINZ_parcel_vertices_lat"]] = df.parcel_geometry.progress_apply(extract_verts)
<string>:6: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
CPU times: user 1min 35s, sys: 3.08 s, total: 1min 39s Wall time: 1min 38s
f. subvector of longitudes of parcel that sits adjacent to a road LINZ_parcel_roadvertices_lon
g. subvector of latitudes of parcel that sits adjacent to a road LINZ_parcel_roadvertices_lat
Let's start with a simple test of one row.
sample = df.iloc[0]
sample
/usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/inference.py:383: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. iter(obj) # Can iterate over it. /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/inference.py:384: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. len(obj) # Has a length associated with it. /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:118: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. s = iter(seq) /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:122: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. for i in range(min(nitems, len(seq))) /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:126: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. if nitems < len(seq):
CL_Longitude 174.588941 CL_Latitude -36.186076 QPID_vintage 2020 geometry POINT (174.588940530675 -36.1860763144825) index_right 5128312 appellation Part Allot S25 PSH OF Arai affected_surveys None parcel_intent DCDB topology_type Primary statutory_actions None land_district North Auckland LINZ_parcel_ID NA589/73 survey_area NaN calc_area 2913.0 parcel_geometry (POLYGON ((174.5887711830001 -36.1863244329999... LINZ_parcel_centroid_lon 174.589185 LINZ_parcel_centroid_lat -36.186039 LINZ_parcel_vertices_lon [174.588771; 174.588883; 174.589331; 174.58955... LINZ_parcel_vertices_lat [-36.186324; -36.185872; -36.18579; -36.186183... Name: 75494, dtype: object
%%time
xmin, ymin, xmax, ymax = sample.parcel_geometry.bounds
search_area = parcels.cx[xmin:xmax, ymin:ymax]
neighbours = search_area[search_area.touches(sample.parcel_geometry)]
neighbours
CPU times: user 12.6 s, sys: 489 ms, total: 13.1 s Wall time: 13.3 s
| appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 5207551 | None | None | Road | Primary | None | North Auckland | None | NaN | 8063.0 | MULTIPOLYGON (((174.58888 -36.18587, 174.58877... | MULTIPOLYGON (((174.58888 -36.18587, 174.58877... |
| 5224880 | None | None | Road | Primary | None | North Auckland | None | NaN | 10916.0 | MULTIPOLYGON (((174.58967 -36.18646, 174.58967... | MULTIPOLYGON (((174.58967 -36.18646, 174.58967... |
| 4941664 | Lot 2 DP 130126 | DP 130126 | DCDB | Primary | None | North Auckland | NA131B/147 | 21810.0 | 21780.0 | MULTIPOLYGON (((174.59156 -36.18479, 174.59187... | MULTIPOLYGON (((174.59156 -36.18479, 174.59187... |
| 6744495 | Lot 43 DP 340819 | DP 340819, DP 477278, LT 409850 | Fee Simple Title | Primary | None | North Auckland | 661091 | 1529.0 | 1528.0 | MULTIPOLYGON (((174.58907 -36.18508, 174.58920... | MULTIPOLYGON (((174.58907 -36.18508, 174.58920... |
| 7558496 | Lot 16 DP 477278 | DP 477278, LT 494228 | Fee Simple Title | Primary | None | North Auckland | 661091 | 6043.0 | 6041.0 | MULTIPOLYGON (((174.58872 -36.18652, 174.58877... | MULTIPOLYGON (((174.58872 -36.18652, 174.58877... |
search_area.parcel_intent[sample.index_right] = "AOI"
cmap = matplotlib.colors.ListedColormap(['red', 'blue', 'green', 'pink'])
ax = search_area.to_crs(epsg=3857).plot(column="parcel_intent", cmap=cmap, legend=True, categorical=True, alpha=.5)
roads = neighbours[neighbours.parcel_intent == "Road"]
intersection = roads.geometry.intersection(sample.parcel_geometry)
intersection.to_crs(epsg=3857).plot(ax=ax, color="red", linewidth=5)
ctx.add_basemap(ax, url=ctx.providers.Esri.WorldImagery)
<string>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy <string>:7: FutureWarning: The "url" option is deprecated. Please use the "source" argument instead.
%%time
roads = parcels[parcels.parcel_intent == "Road"]
CPU times: user 12.2 s, sys: 1.09 s, total: 13.3 s Wall time: 13.5 s
%%time
if os.path.isfile("restricted/road_intersections.pkl"):
road_intersections = pd.read_pickle("restricted/road_intersections.pkl")
else:
# Computationally intensive - with 16 CPUs - Wall time: 1h 23min 27s
def extract_road_intersections(geom):
if geom:
xmin, ymin, xmax, ymax = geom.bounds
neighbouring_roads = roads.cx[xmin:xmax, ymin:ymax]
return neighbouring_roads.geometry.intersection(geom).unary_union
return np.nan
road_intersections = df.parcel_geometry.parallel_apply(extract_road_intersections)
road_intersections.to_pickle("restricted/road_intersections.pkl")
CPU times: user 8.5 s, sys: 299 ms, total: 8.8 s Wall time: 8.89 s
assert len(road_intersections) == len(df)
types = pd.Series(type(i) for i in road_intersections)
types.index = road_intersections.index
types.value_counts()
<class 'shapely.geometry.linestring.LineString'> 199991 <class 'shapely.geometry.multilinestring.MultiLineString'> 141418 <class 'NoneType'> 32985 <class 'shapely.geometry.collection.GeometryCollection'> 1290 <class 'shapely.geometry.point.Point'> 393 <class 'shapely.geometry.multipoint.MultiPoint'> 16 <class 'shapely.geometry.polygon.Polygon'> 1 dtype: int64
display(df[types == Polygon])
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||
| 258637 | 174.84187 | -36.903922 | 2020 | POINT (174.84187 -36.90392) | 7515185 | Section 1 SO 471988 | SO 471988, SO 509515 | Glitch? | Primary | [Create] Land declared Road New Zealand Gazett... | North Auckland | None | 690.0 | 688.0 | MULTIPOLYGON (((174.84229 -36.90402, 174.84226... | 174.841943 | -36.903956 | [174.842286; 174.842257; 174.84188; 174.841667... | [-36.904017; -36.904065; -36.904015; -36.90396... |
geom = df.parcel_geometry[258637]
xmin, ymin, xmax, ymax = geom.bounds
neighbouring_roads = roads.cx[xmin:xmax, ymin:ymax]
neighbouring_roads = neighbouring_roads[neighbouring_roads.touches(geom)]
road_intersections[258637] = neighbouring_roads.geometry.intersection(geom).unary_union
types[258637] = type(road_intersections[258637])
types.value_counts()
<class 'shapely.geometry.linestring.LineString'> 199991 <class 'shapely.geometry.multilinestring.MultiLineString'> 141419 <class 'NoneType'> 32985 <class 'shapely.geometry.collection.GeometryCollection'> 1290 <class 'shapely.geometry.point.Point'> 393 <class 'shapely.geometry.multipoint.MultiPoint'> 16 dtype: int64
collections = road_intersections[types == GeometryCollection]
collection_types = []
for c in collections:
collection_types.extend(type(o) for o in c)
pd.Series(collection_types).value_counts()
<string>:3: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
<class 'shapely.geometry.linestring.LineString'> 1245 <class 'shapely.geometry.point.Point'> 572 <class 'shapely.geometry.polygon.Polygon'> 2 dtype: int64
for i, c in collections.iteritems():
if Polygon in [type(o) for o in c]:
print(i)
2211389 2232849
<string>:2: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
problems = df.loc[[2211389, 2232849]]
display(problems)
xmin, ymin, xmax, ymax = problems.parcel_geometry.unary_union.bounds
search_area = parcels.cx[xmin:xmax, ymin:ymax]
search_area.parcel_intent[problems.index_right] = "AOI"
cmap = matplotlib.colors.ListedColormap(['red', 'blue', 'green', 'pink'])
ax = search_area.to_crs(epsg=3857).plot(column="parcel_intent", cmap=cmap, legend=True, categorical=True, alpha=.5, edgecolor="black")
roads = search_area[search_area.parcel_intent == "Road"]
intersection = roads.geometry.intersection(problems.parcel_geometry.unary_union)
intersection.to_crs(epsg=3857).plot(ax=ax, color="red", linewidth=5)
intersection.boundary.to_crs(epsg=3857).plot(ax=ax, color="blue", linewidth=5)
display(intersection)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||
| 2211389 | 174.690915 | -36.764765 | 2020 | POINT (174.69092 -36.76476) | 6605986 | Lot 109 DP 310598 | DP 310598 | Fee Simple Title | Primary | None | North Auckland | 41583 | 603.0 | 603.0 | MULTIPOLYGON (((174.69072 -36.76480, 174.69098... | 174.690924 | -36.764765 | [174.69072; 174.690984; 174.691061; 174.691117... | [-36.764801; -36.764606; -36.764662; -36.76473... |
| 2232849 | 174.690710 | -36.764933 | 2020 | POINT (174.69071 -36.76493) | 6616860 | Lot 68 DP 312607 | DP 312607 | Fee Simple Title | Primary | None | North Auckland | 49611 | 502.0 | 502.0 | MULTIPOLYGON (((174.69053 -36.76494, 174.69072... | 174.690721 | -36.764940 | [174.69053; 174.69072; 174.690863; 174.690907;... | [-36.764943; -36.764801; -36.764926; -36.76496... |
<string>:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /usr/local/lib/python3.8/dist-packages/pandas/core/series.py:1135: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._set_values(indexer, value)
id 6616875 POLYGON ((174.69072 -36.76480, 174.69053 -36.7... 6606001 MULTILINESTRING ((174.69112 -36.76474, 174.691... 6616867 MULTILINESTRING ((174.69053 -36.76494, 174.690... 6606004 POLYGON ((174.69072 -36.76480, 174.69072 -36.7... dtype: geometry
%%time
LINZ_parcel_roadvertices_lon = []
LINZ_parcel_roadvertices_lat = []
for intersection in tqdm(road_intersections):
lat = np.nan
lon = np.nan
if intersection:
lat = []
lon = []
try:
if type(intersection) in (LineString, Point):
lon.extend(intersection.xy[0])
lat.extend(intersection.xy[1])
elif type(intersection) in (MultiLineString, GeometryCollection, MultiPoint):
for p in intersection.geoms:
if type(p) == Polygon:
lon.extend(p.exterior.xy[0][:-1])
lat.extend(p.exterior.xy[1][:-1])
else:
lon.extend(p.xy[0])
lat.extend(p.xy[1])
else:
raise Exception(f"Don't know how to handle {type(intersection)}")
except:
print(type(intersection))
print(intersection)
raise
lon = f"[{'; '.join([str(round(lon, 6)) for lon in lon])}]"
lat = f"[{'; '.join([str(round(lat, 6)) for lat in lat])}]"
LINZ_parcel_roadvertices_lon.append(lon)
LINZ_parcel_roadvertices_lat.append(lat)
df["LINZ_parcel_roadvertices_lon"] = LINZ_parcel_roadvertices_lon
df["LINZ_parcel_roadvertices_lat"] = LINZ_parcel_roadvertices_lat
CPU times: user 42.6 s, sys: 805 ms, total: 43.4 s Wall time: 43.3 s
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | LINZ_parcel_roadvertices_lon | LINZ_parcel_roadvertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) | 5128312 | Part Allot S25 PSH OF Arai | None | DCDB | Primary | None | North Auckland | NA589/73 | NaN | 2913.0 | MULTIPOLYGON (((174.58877 -36.18632, 174.58888... | 174.589185 | -36.186039 | [174.588771; 174.588883; 174.589331; 174.58955... | [-36.186324; -36.185872; -36.18579; -36.186183... | [174.588883; 174.588771; 174.589551; 174.58933... | [-36.185872; -36.186324; -36.186183; -36.18579... |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) | 5128309 | Lot 2 DP 130303 | DP 130303 | DCDB | Primary | None | North Auckland | NA76B/662 | 9859.0 | 8620.0 | MULTIPOLYGON (((174.58154 -36.20156, 174.58161... | 174.581884 | -36.200845 | [174.581541; 174.581606; 174.581586; 174.58184... | [-36.201565; -36.201229; -36.201017; -36.19972... | [174.581841; 174.581586; 174.581586; 174.58160... | [-36.199726; -36.201017; -36.201017; -36.20122... |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) | 4823770 | Part Otioro & Te Topuni A2A Block | ML 9928 | DCDB | Primary | None | North Auckland | NA1373/48 | 8296.0 | 7490.0 | MULTIPOLYGON (((174.49514 -36.22889, 174.49701... | 174.496751 | -36.228824 | [174.495137; 174.497012; 174.497667; 174.49766... | [-36.228888; -36.228552; -36.228639; -36.22893... | [174.497667; 174.497012; 174.497012; 174.495137] | [-36.228639; -36.228552; -36.228552; -36.228888] |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) | 4697657 | Lot 1 DP 44316 | DP 44316 | DCDB | Primary | None | North Auckland | NA1373/47 | 3667.0 | 4152.0 | MULTIPOLYGON (((174.49863 -36.22877, 174.49862... | 174.498193 | -36.228933 | [174.498626; 174.498615; 174.498386; 174.49766... | [-36.228766; -36.229337; -36.22924; -36.228936... | [174.498626; 174.497667] | [-36.228766; -36.228639] |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) | 5202126 | Lot 1 DP 52926 | DP 52926 | DCDB | Primary | None | North Auckland | NA8B/226 | 3331.0 | 3323.0 | MULTIPOLYGON (((174.49427 -36.23026, 174.49386... | 174.494172 | -36.230010 | [174.49427; 174.493857; 174.493618; 174.493729... | [-36.230265; -36.230116; -36.230056; -36.22977... | [174.494901; 174.493729] | [-36.229975; -36.229771] |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) | 7974171 | Lot 82 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826394 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72812... | 174.728112 | -36.408104 | [174.728128; 174.728123; 174.728076; 174.72808... | [-36.408145; -36.408144; -36.408135; -36.40810... | NaN | NaN |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) | 7974172 | Lot 83 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826395 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72814... | 174.728164 | -36.408113 | [174.728128; 174.728137; 174.728139; 174.72814... | [-36.408145; -36.408111; -36.408106; -36.40807... | NaN | NaN |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) | 7440747 | Lot 2 DP 460961 | DP 460961 | Fee Simple Title | Primary | None | North Auckland | 605470 | 4500.0 | 4495.0 | MULTIPOLYGON (((174.65635 -36.51025, 174.65624... | 174.655985 | -36.510014 | [174.656354; 174.656238; 174.656031; 174.65582... | [-36.510252; -36.510457; -36.510421; -36.51039... | [174.656154; 174.656017; 174.656017; 174.65593... | [-36.509474; -36.50944; -36.50944; -36.509423;... |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) | 7987560 | Lot 1 DP 533552 | DP 533552 | Fee Simple Title | Primary | None | North Auckland | 878889 | 62163.0 | 62143.0 | MULTIPOLYGON (((174.53978 -36.77485, 174.53974... | 174.538969 | -36.773813 | [174.539778; 174.539738; 174.539702; 174.53967... | [-36.774848; -36.774902; -36.774953; -36.77500... | [174.539778; 174.539832; 174.539832; 174.53988... | [-36.774848; -36.774768; -36.774768; -36.77468... |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) | 7799795 | Section 36 SO 498829 | SO 498829 | Fee Simple Title | Primary | [Create] Fee Simple Title New Zealand Gazette ... | North Auckland | 900731 | 1460.0 | 1459.0 | MULTIPOLYGON (((174.64432 -36.85494, 174.64428... | 174.644048 | -36.854825 | [174.64432; 174.644277; 174.644156; 174.643992... | [-36.854943; -36.854953; -36.854981; -36.85501... | [174.643415; 174.643431] | [-36.854967; -36.854998] |
376094 rows × 21 columns
h. AUP Zone Code of adjoining parcels (this includes residential, business, and rural zones; it should also include roads, water and open spaces) LINZ_parcel_sides_zones
%%time
zones = gpd.read_file("input/MASTER_UP_BaseZone_SHP.zip")
zones = zones.to_crs(parcels.crs)
zones
CPU times: user 28.6 s, sys: 4 s, total: 32.6 s Wall time: 32.8 s
| OBJECTID | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | last_edite | NAME | PARCEL_BAS | PARCEL_B_1 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT_1 | SCHEDULE | STATUS | SUBPRECINC | SUBPRECI_1 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VERSIONSTA | VERSIONS_1 | ZONE | ZONE_resol | ZONEHEIGHT | SHAPE_Leng | SHAPE_Area | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | None | 20160718211 | None | {4C8F9436-7EA6-417E-B64F-15FCD44459F6} | 2 | Residential | 20161111010 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 60 | Residential - Mixed Housing Urban Zone | NaN | 285.664016 | 2.050275e+03 | POLYGON ((174.88890 -37.02031, 174.88893 -37.0... |
| 1 | 2.0 | None | 20160718211 | None | {604AAD87-8ED4-4111-8276-47CEE7E81F92} | 1 | Public Open Space | 20161111010 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 33 | Open Space - Sport and Active Recreation Zone | NaN | 1246.837757 | 1.684599e+04 | POLYGON ((174.84254 -36.85175, 174.84198 -36.8... |
| 2 | 3.0 | None | 20160718211 | None | {8D827DA8-BC5B-437A-B17A-532354F7D037} | 4 | Rural | 20161111010 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 11 | Rural - Mixed Rural Zone | NaN | 3582.113246 | 6.841744e+05 | POLYGON ((174.56993 -36.78067, 174.56991 -36.7... |
| 3 | 4.0 | None | 20160718211 | None | {96C9E266-3341-4C71-94F1-325F2EE45732} | 2 | Residential | 20161111010 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 23 | Residential - Large Lot Zone | NaN | 317.098469 | 6.024226e+03 | POLYGON ((174.68648 -36.77575, 174.68596 -36.7... |
| 4 | 5.0 | None | 20160718211 | None | {90B50FEE-45A3-4E88-819A-370751ACDE3D} | 1 | Public Open Space | 20161111010 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 31 | Open Space - Conservation Zone | NaN | 230.836636 | 5.639794e+02 | POLYGON ((174.58715 -36.87164, 174.58733 -36.8... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 130295 | 130296.0 | None | 20161115151 | None | {B2F0FB45-80F6-41ED-AA57-A914B195B31E} | 1 | Public Open Space | 20161115151 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 31 | Open Space - Conservation Zone | NaN | 179382.307787 | 9.040193e+07 | POLYGON ((174.55200 -37.01270, 174.55225 -37.0... |
| 130296 | 130297.0 | None | 20161115151 | None | {A3F4EF61-9162-43C3-90BD-DC84D93C6A64} | 2 | Residential | 20161115151 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 18 | Residential - Mixed Housing Suburban Zone | NaN | 2769.292040 | 2.920602e+05 | POLYGON ((174.91212 -36.97863, 174.91223 -36.9... |
| 130297 | 130298.0 | None | 20161115151 | None | {A5FC7EB5-76E5-414C-96DD-715C671764B4} | 4 | Rural | 20161115151 | Kaipara South Head and Harbour coastal area | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 46 | Rural - Rural Coastal Zone | NaN | 27168.762393 | 6.344938e+06 | POLYGON ((174.45505 -36.53462, 174.45506 -36.5... |
| 130298 | 130299.0 | None | 20161115151 | None | {C371B83C-35CE-46A1-B94F-F1E592F271F7} | 2 | Residential | 20161115151 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 8 | Residential - Terrace Housing and Apartment Bu... | NaN | 1812.109533 | 1.536829e+05 | POLYGON ((174.60367 -36.81788, 174.60326 -36.8... |
| 130299 | 130300.0 | None | 20161115151 | None | {31281924-C74D-4038-8260-FE6D886F4C94} | 2 | Residential | 20161115151 | None | None | None | None | None | None | None | None | None | None | None | None | None | None | None | 4 | Operative | 18 | Residential - Mixed Housing Suburban Zone | NaN | 2511.267945 | 3.539606e+05 | POLYGON ((174.67000 -36.80217, 174.67007 -36.8... |
130300 rows × 31 columns
%%time
df.geometry = df.parcel_geometry
df = df.drop(columns="index_right")
df.sindex
parcels.sindex
df.has_sindex, parcels.has_sindex
CPU times: user 21.3 s, sys: 1.5 s, total: 22.8 s Wall time: 23 s
(True, True)
%%time
touches = gpd.sjoin(df, parcels, op="touches").parcel_geometry_right
CPU times: user 12min 50s, sys: 8.1 s, total: 12min 58s Wall time: 12min 59s
touches
CL_QPID_output2
75494 MULTIPOLYGON (((174.58967 -36.18646, 174.58967...
75494 MULTIPOLYGON (((174.58888 -36.18587, 174.58877...
75494 MULTIPOLYGON (((174.58872 -36.18652, 174.58877...
75494 MULTIPOLYGON (((174.59156 -36.18479, 174.59187...
75494 MULTIPOLYGON (((174.58907 -36.18508, 174.58920...
...
3160630 MULTIPOLYGON (((174.65635 -36.51025, 174.65623...
3164546 MULTIPOLYGON (((174.53767 -36.77386, 174.53780...
3164546 MULTIPOLYGON (((174.53752 -36.77150, 174.53816...
3164546 MULTIPOLYGON (((174.53426 -36.77049, 174.53367...
3166852 MULTIPOLYGON (((174.64341 -36.85497, 174.64335...
Name: parcel_geometry_right, Length: 2076816, dtype: geometry
sample = df.loc[[75494]]
ax = sample.to_crs(epsg=3857).plot(color="red", alpha=.5, edgecolor="black")
touches[[75494]].to_crs(epsg=3857).plot(color="blue", alpha=.5, edgecolor="black", ax=ax)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
touches_df = gpd.GeoDataFrame(geometry=touches.representative_point(), crs=df.crs)
print(len(touches_df), len(zones))
2076816 130300
%%time
touched_zones = gpd.sjoin(touches_df, zones, op="within")
CPU times: user 32.9 s, sys: 899 ms, total: 33.8 s Wall time: 33.9 s
df["LINZ_parcel_sides_zones"] = touched_zones.groupby("CL_QPID_output2").ZONE.apply(list)
df["LINZ_parcel_sides_zones"]
CL_QPID_output2
75494 [27, 27, 16, 16, 16]
75499 [16, 16, 27, 27, 27]
75639 [16, 16, 16, 27]
75640 [16, 16, 16, 27]
75654 [16, 16, 26]
...
3160123 [46, 18]
3160124 [46, 18]
3160630 [27, 20, 20, 20, 20, 20]
3164546 [27, 18, 18, 18, 26, 26, 26]
3166852 [19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 27]
Name: LINZ_parcel_sides_zones, Length: 376094, dtype: object
df["LINZ_parcel_sides_zones"].str.len().value_counts()
5 110872 4 90033 6 71883 7 34022 3 28509 8 15928 9 8615 10 4710 11 2777 12 1787 13 1288 14 1052 2 970 15 696 16 609 17 581 18 358 19 266 29 249 21 232 24 136 20 116 22 61 1 51 28 49 40 41 37 34 30 30 23 29 58 21 25 21 26 17 36 15 46 11 31 8 44 6 27 3 54 2 38 1 59 1 35 1 61 1 41 1 53 1 Name: LINZ_parcel_sides_zones, dtype: int64
zones[["ZONE", "ZONE_resol"]].value_counts()
ZONE ZONE_resol 27 Road 47012 30 Coastal - General Coastal Marine Zone 26326 59 Coastal - Coastal Transition Zone 15703 18 Residential - Mixed Housing Suburban Zone 9775 60 Residential - Mixed Housing Urban Zone 5864 26 Strategic Transport Corridor Zone 4215 19 Residential - Single House Zone 3766 31 Open Space - Conservation Zone 3063 32 Open Space - Informal Recreation Zone 2926 8 Residential - Terrace Housing and Apartment Building Zone 2134 16 Rural - Rural Production Zone 1025 25 Water 1013 12 Business - Mixed Use Zone 782 46 Rural - Rural Coastal Zone 768 17 Business - Light Industry Zone 593 44 Business - Neighbourhood Centre Zone 564 20 Residential - Rural and Coastal Settlement Zone 480 33 Open Space - Sport and Active Recreation Zone 477 22 Business - Town Centre Zone 432 23 Residential - Large Lot Zone 379 11 Rural - Mixed Rural Zone 344 3 Rural - Countryside Living Zone 326 4 Future Urban Zone 282 7 Business - Local Centre Zone 266 43 Hauraki Gulf Islands 221 35 Business - City Centre Zone 191 34 Open Space - Community Zone 183 69 Rural - Waitakere Ranges Zone 172 10 Business - Metropolitan Centre Zone 160 5 Business - Heavy Industry Zone 130 63 Special Purpose - School Zone 126 41 Coastal - Mooring Zone 124 49 Business - General Business Zone 103 15 Rural - Rural Conservation Zone 71 68 Rural - Waitakere Foothills Zone 44 53 Special Purpose - Cemetery Zone 41 51 Special Purpose - Quarry Zone 34 52 Special Purpose - Maori Purpose Zone 32 54 Special Purpose - Major Recreation Facility Zone 28 55 Special Purpose - Healthcare Facility and Hospital Zone 23 45 Coastal - Ferry Terminal Zone 20 40 Coastal - Marina Zone 20 62 Open Space - Civic Spaces Zone 18 1 Business - Business Park Zone 16 56 Special Purpose - Airports and Airfields Zone 9 61 Green Infrastructure Corridor 6 64 Special Purpose - Tertiary Education Zone 4 37 Coastal - Minor Port Zone 4 39 Coastal - Defence Zone 3 dtype: int64
sample = df[df["LINZ_parcel_sides_zones"].str.len() == 1].iloc[[0]]
sample
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | LINZ_parcel_roadvertices_lon | LINZ_parcel_roadvertices_lat | LINZ_parcel_sides_zones | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||||
| 83639 | 174.432209 | -36.43788 | 2020 | MULTIPOLYGON (((174.43214 -36.43812, 174.43212... | Section 29 Block X Tauhoa SD | SO 57581 | DCDB | Primary | None | North Auckland | NA55B/702 | 1961.0 | 1958.0 | MULTIPOLYGON (((174.43214 -36.43812, 174.43212... | 174.432304 | -36.437643 | [174.432142; 174.432116; 174.432159; 174.43261... | [-36.438119; -36.437691; -36.43738; -36.437394... | [174.432116; 174.432142; 174.432142; 174.43254... | [-36.437691; -36.438119; -36.438119; -36.43757... | [26] |
ax = sample.to_crs(epsg=3857).plot(color="red", alpha=.5, edgecolor="black")
touches[[83639]].to_crs(epsg=3857).plot(color="blue", alpha=.5, edgecolor="black", ax=ax)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
i. Transpower GIS
i. Indicator (0 or 1) for LINZ parcel located under overhead transpower line. Leave blank otherwise. Note that ‘TRANSLINE’ denotes overhead transmission lines in the GIS dataset, while ‘CABLE’ denotes underground transmission lines and can be ignored. LINZ_TRNSPWR_ohead_indicator
ii. Name (‘description’ in the GIS dataset) of the overhead transmission line (e.g. ‘Henderson - Otahuhu A’). LINZ_TRNSPWR_ohead_name
power = gpd.read_file('input/Transmission_Lines_exTRANSPOWER.zip!Transmission_Lines.shp')
# only interested in overhead
power = power.to_crs(df.crs)
power = power[power['type'] == 'TRANSLINE']
power.sample(3)
| OBJECTID | MXLOCATION | designvolt | status | descriptio | type | Symbol | Shape__Len | GlobalID | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 208 | 220 | EDG-WAI-B | 110 | COMMISSIONED | Edgecumbe - Waiotahi B | TRANSLINE | 110 TRANSLINE | 35235.792412 | 68b2a4c5-9560-4745-a33d-8bb00a7ac1b8 | MULTILINESTRING ((177.18929 -38.02690, 177.192... |
| 177 | 188 | BPE-WRK-A | 220 | COMMISSIONED | Bunnythorpe - Wairakei A | TRANSLINE | 220 TRANSLINE | 215057.139858 | 40a67dcb-12a1-449f-a6f0-b59d9f52d9c9 | MULTILINESTRING ((175.64101 -40.28143, 175.641... |
| 99 | 101 | INV-TWI-A | 220 | COMMISSIONED | Invercargill - Tiwai A | TRANSLINE | 220 TRANSLINE | 25227.543630 | 0846aef0-61e2-42ab-8c28-f33b070bfe3d | MULTILINESTRING ((168.39283 -46.58953, 168.392... |
%%time
df = gpd.sjoin(df, power[["descriptio", "geometry"]], how="left")
CPU times: user 58 s, sys: 577 ms, total: 58.6 s Wall time: 58.6 s
# Sometimes multiple powerlines overlap a given parcel. Just take the first.
df = df[~df.index.duplicated(keep='first')]
df["LINZ_TRNSPWR_ohead_indicator"] = ~pd.isna(df.descriptio)
df["LINZ_TRNSPWR_ohead_indicator"].value_counts()
False 374159 True 1935 Name: LINZ_TRNSPWR_ohead_indicator, dtype: int64
df = df.rename(columns={"descriptio": "LINZ_TRNSPWR_ohead_name"})
df["LINZ_TRNSPWR_ohead_name"].value_counts()
Henderson - Otahuhu A 251 Henderson - Hepburn Road A 221 Bombay - Otahuhu A 199 Henderson - Mt Roskill A 173 Huntly - Otahuhu A 171 Penrose - Mt Roskill A 148 Hepburn Road - Mt Roskill A 138 Mangere - Mt Roskill A 136 Otahuhu - Penrose C 78 Otahuhu - Whakamaru A 60 Otahuhu - Penrose A 56 Otahuhu - Whakamaru C 56 Henderson - Maungatapere A 53 Henderson - Marsden A 45 Otahuhu - Whakamaru B 43 Albany - Henderson A 37 Otahuhu - Penrose B 37 Mangere - Otahuhu A 28 Albany - Huapai A 3 Albany - Silverdale A 2 Name: LINZ_TRNSPWR_ohead_name, dtype: int64
j. Volcanic Cone and Museum Viewshaft GIS
i. Indicator (0 or 1) for LINZ parcel located under viewshafts. Leave blank otherwise. LINZ_VWSHFT_ohead_indicator
ii. Name of the volcanic cone (e.g. Mt Albert). Leave blank if no viewshaft applies. LINZ_VWSHFT_ohead_name
iii. OBJECTID of the viewshaft. Leave blank if no viewshaft applies. LINZ_VWSHFT_ohead_ID
%%time
museum = gpd.read_file("input/AUP_viewshafts.zip!MASTER_UP_AucklandMuseumViewshaftOverlay.shp")
local = gpd.read_file("input/AUP_viewshafts.zip!MASTER_UP_LocallySignificantVolcanicViewshafts.shp")
regional = gpd.read_file("input/AUP_viewshafts.zip!MASTER_UP_RegionallySignificantVolcanicViewShaftsAndHeightSensitiveAreasOverlay.shp")
museum.NAME = "Auckland Museum"
viewshafts = pd.concat([museum, local, regional])
viewshafts = viewshafts.to_crs(df.crs)
viewshafts
CPU times: user 9.31 s, sys: 258 ms, total: 9.56 s Wall time: 9.6 s
| last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 20161109025 | None | 20160517020 | None | {F621DB4A-42B1-4587-AF6A-5C2DC03EB556} | None | None | None | Auckland Museum | 1 | None | None | None | None | None | None | None | 6.945553e+06 | 11955.264438 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | None | None | None | POLYGON ((174.77185 -36.83347, 174.77362 -36.8... |
| 0 | 20161113203 | NaN | 20160531000 | None | {0465297C-4FBE-4547-918D-0AC7C27D9F89} | NaN | NaN | NaN | One Tree Hill | 1 | NaN | NaN | NaN | NaN | NaN | NaN | O5 | 4.067488e+05 | 4142.416833 | NaN | NaN | NaN | None | NaN | None | NaN | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.78204 -36.90157, 174.78197 -36.9... |
| 1 | 20161113203 | NaN | 20160531000 | None | {859026B0-3810-4D0A-A882-E607F452A7AE} | NaN | NaN | NaN | One Tree Hill | 2 | NaN | NaN | NaN | NaN | NaN | NaN | O10 | 3.435680e+06 | 12427.605528 | NaN | NaN | NaN | None | NaN | None | NaN | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.78550 -36.90443, 174.78169 -36.8... |
| 2 | 20161113203 | NaN | 20160531000 | None | {BA7883B6-9B97-4966-B06A-CBE0146A9180} | NaN | NaN | NaN | Rangitoto Island | 3 | NaN | NaN | NaN | NaN | NaN | NaN | T8 | 6.998701e+06 | 17350.393792 | NaN | NaN | NaN | None | NaN | None | NaN | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.86535 -36.82495, 174.86026 -36.8... |
| 3 | 20161113203 | NaN | 20160530235 | None | {BF3FE3B5-8C80-4547-9A2D-9A499D2A2D30} | NaN | NaN | NaN | Mount Wellington | 4 | NaN | NaN | NaN | NaN | NaN | NaN | W13 | 1.069608e+06 | 7569.100897 | NaN | NaN | NaN | None | NaN | None | NaN | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.84898 -36.89456, 174.84891 -36.8... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 105 | 20161109010 | NaN | 20160531002 | None | {D6266948-3BAB-45C1-AFB4-E5A02E3F4AC6} | NaN | NaN | NaN | Mount Hobson | 106 | NaN | NaN | NaN | NaN | NaN | NaN | H3 | 3.556122e+05 | 6312.342912 | NaN | NaN | NaN | None | NaN | 4 | Viewshafts | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.79960 -36.85303, 174.80091 -36.8... |
| 106 | 20161109010 | NaN | 20160531002 | None | {25B4D37C-EEE6-4EF2-B054-F32772B5C353} | NaN | NaN | NaN | Mount Victoria | 107 | NaN | NaN | NaN | NaN | NaN | NaN | V1 | 5.576748e+05 | 7020.534172 | NaN | NaN | NaN | None | NaN | 4 | Viewshafts | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.79735 -36.82703, 174.79731 -36.8... |
| 107 | 20161109010 | NaN | 20160531002 | None | {6ECB66B5-574A-46AB-BDF8-CCAC62F6F907} | NaN | NaN | NaN | Mount Eden | 108 | NaN | NaN | NaN | NaN | NaN | NaN | E8 | 1.550483e+06 | 12560.510685 | NaN | NaN | NaN | None | NaN | 4 | Viewshafts | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.79877 -36.83164, 174.80019 -36.8... |
| 108 | 20161109010 | NaN | 20160531002 | None | {042DA6E8-7FC9-48CE-B991-5EE1BAD01BD6} | NaN | NaN | NaN | Mount Eden | 109 | NaN | NaN | NaN | NaN | NaN | NaN | E14 | 6.768010e+05 | 3771.612175 | NaN | NaN | NaN | None | NaN | 4 | Viewshafts | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.77713 -36.87383, 174.77817 -36.8... |
| 109 | 20161109010 | NaN | 20160531002 | None | {A15A8EAC-42E7-482F-950C-74D557D7CA34} | NaN | NaN | NaN | Mount Wellington | 110 | NaN | NaN | NaN | NaN | NaN | NaN | W3 | 2.266084e+05 | 2601.139443 | NaN | NaN | NaN | None | NaN | 4 | Viewshafts | 3 | Valid and Public | 4 | Operative | NaN | NaN | NaN | POLYGON ((174.83530 -36.89644, 174.84523 -36.8... |
116 rows × 34 columns
len(df)
376094
%%time
df = gpd.sjoin(df.drop(columns="index_right"), viewshafts[["NAME", "OBJECTID", "geometry"]], how="left")
CPU times: user 32.8 s, sys: 688 ms, total: 33.5 s Wall time: 33.5 s
# Sometimes multiple viewshafts overlap a given parcel. Just take the first.
df = df[~df.index.duplicated(keep='first')]
df["LINZ_VWSHFT_ohead_indicator"] = ~pd.isna(df.NAME)
df = df.rename(columns={"NAME": "LINZ_VWSHFT_ohead_name", "OBJECTID": "LINZ_VWSHFT_ohead_ID"})
df["LINZ_VWSHFT_ohead_indicator"].value_counts()
False 327913 True 48181 Name: LINZ_VWSHFT_ohead_indicator, dtype: int64
df["LINZ_VWSHFT_ohead_name"].value_counts()
Mount Eden 16083 Mount Wellington 10099 One Tree Hill 7709 Mount Albert 3206 Rangitoto Island 2648 Auckland Museum 1772 Mount Victoria 1759 Mangere Mountain 1645 Mount Hobson 1605 Browns Island 491 Mount Mangere 417 Big King 383 Mount Roskill 353 Mount Richmond 11 Name: LINZ_VWSHFT_ohead_name, dtype: int64
df.keys()
Index(['CL_Longitude', 'CL_Latitude', 'QPID_vintage', 'geometry',
'appellation', 'affected_surveys', 'parcel_intent', 'topology_type',
'statutory_actions', 'land_district', 'LINZ_parcel_ID', 'survey_area',
'calc_area', 'parcel_geometry', 'LINZ_parcel_centroid_lon',
'LINZ_parcel_centroid_lat', 'LINZ_parcel_vertices_lon',
'LINZ_parcel_vertices_lat', 'LINZ_parcel_roadvertices_lon',
'LINZ_parcel_roadvertices_lat', 'LINZ_parcel_sides_zones',
'LINZ_TRNSPWR_ohead_name', 'LINZ_TRNSPWR_ohead_indicator',
'index_right', 'LINZ_VWSHFT_ohead_name', 'LINZ_VWSHFT_ohead_ID',
'LINZ_VWSHFT_ohead_indicator'],
dtype='object')
df.drop(columns=["geometry", "parcel_geometry"]).to_csv("restricted/QPIDs_Auckland_1990_2020_augmented.csv")